In [54]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
# import seaborn as sns 
import datetime
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import Scatter, Figure, Layout
import plotly
import plotly.graph_objs as go
import plotly.express as px
from IPython.display import Markdown as md
init_notebook_mode(connected=False)
import io
import requests
import re

COVID-19 in Italy. Visuals


(alternatively, see results and code together here)

 


Data source: this GitHubi page

Authors and sources mentioned: Editore/Autore del dataset: Dipartimento della Protezione Civile. Categoria ISO 19115: Salute. Dati forniti dal Ministero della Salute.

Regional data files (Dati per Regione):
  • Struttura file giornaliero: dpc-covid19-ita-regioni-yyyymmdd.csv (dpc-covid19-ita-regioni-20200224.csv)
  • File complessivo: dpc-covid19-ita-regioni.csv
  • File ultimi dati (latest): dpc-covid19-ita-regioni-latest.csv

 

In [55]:
URL='https://it.wikipedia.org/wiki/Regione_(Italia)'
res=requests.get(URL)
tables=pd.read_html(res.text)
dt = tables[13]
In [56]:
def dewhite(x):
    ''.join(re.findall('\d+', x))

dt2 = dt[['Regione','Popolazione (ab.)']].copy()
dt2.columns = ['Region','Pop']
    
dt2.Pop = dt2.Pop.apply(lambda x: ''.join(re.findall('\d+', x))).astype(int)
In [57]:
s = requests.get("https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni.csv").content
dat = pd.read_csv(io.StringIO(s.decode('utf-8')))
cdate = dat.data.max()

md("Currently data as of date: {}".format(cdate))
Out[57]:

Currently data as of date: 2020-10-21T17:00:00


 

What's in the original dataframe?

In [58]:
md("All column names: {}".format(dat.columns.tolist()))
Out[58]:

All column names: ['data', 'stato', 'codice_regione', 'denominazione_regione', 'lat', 'long', 'ricoverati_con_sintomi', 'terapia_intensiva', 'totale_ospedalizzati', 'isolamento_domiciliare', 'totale_positivi', 'variazione_totale_positivi', 'nuovi_positivi', 'dimessi_guariti', 'deceduti', 'casi_da_sospetto_diagnostico', 'casi_da_screening', 'totale_casi', 'tamponi', 'casi_testati', 'note']

In [59]:
df = dat.drop(['stato','codice_regione'], axis=1)
df.columns = ['Date','Region','Lat','Long','HospWithSymptoms','IC','HospTotal','AtHome','CurrentlyPositive','VariationOfPositives','NewPositives','Recovered', 'Deaths','Diagnostico','Screening','TotalCases','NoOfTests','casi_testati','note']

df = pd.merge(df, dt2, left_on='Region', right_on='Region')

df['Date'] = pd.to_datetime(df['Date']).dt.date
df = df.set_index(df["Date"])
df.index = pd.to_datetime(df.index)

df['NewPositives'] = np.abs(df['NewPositives'])

dat.tail(5)
Out[59]:
data stato codice_regione denominazione_regione lat long ricoverati_con_sintomi terapia_intensiva totale_ospedalizzati isolamento_domiciliare ... variazione_totale_positivi nuovi_positivi dimessi_guariti deceduti casi_da_sospetto_diagnostico casi_da_screening totale_casi tamponi casi_testati note
5056 2020-10-21T17:00:00 ITA 19 Sicilia 38.115697 13.362357 565 83 648 7202 ... 353 562 5551 389 8879.0 4911.0 13790 614264 437540.0 NaN
5057 2020-10-21T17:00:00 ITA 9 Toscana 43.769231 11.255889 503 76 579 11660 ... 693 866 12006 1221 20011.0 5455.0 25466 950782 639007.0 NaN
5058 2020-10-21T17:00:00 ITA 10 Umbria 43.106758 12.388247 152 20 172 2783 ... 280 350 2403 95 2183.0 3270.0 5453 262392 154074.0 NaN
5059 2020-10-21T17:00:00 ITA 2 Valle d'Aosta 45.737503 7.320149 42 5 47 784 ... 106 111 1155 146 1913.0 219.0 2132 35990 23483.0 NaN
5060 2020-10-21T17:00:00 ITA 5 Veneto 45.434905 12.338452 439 56 495 10938 ... 1177 1422 24550 2282 23338.0 14927.0 38265 2178114 849385.0 NaN

5 rows × 21 columns


 

Variable names to English and their explanation

  • HospWithSymptoms : Currently hospitalized patients with symptoms
  • IC : Intensive care
  • HospTotal: Total number of currently hospitalized patients
  • AtHome : Currently at home confinement
  • CurrentlyPositive : Total amount of current positive cases (Hospitalised patients + Home confinement)
  • NewPositives : New amount of positive cases (Actual total amount of current positive cases - total amount of current positive cases of the previous day)
  • TotalCases : Total amount of positive cases
  • NoOfTests : Tests performed
In [60]:
df.tail()
Out[60]:
Date Region Lat Long HospWithSymptoms IC HospTotal AtHome CurrentlyPositive VariationOfPositives NewPositives Recovered Deaths Diagnostico Screening TotalCases NoOfTests casi_testati note Pop
Date
2020-10-17 2020-10-17 Veneto 45.434905 12.338452 328 43 371 8369 8740 602 774 24064 2247 22758.0 12293.0 35051 2135650 832701.0 NaN 4905854
2020-10-18 2020-10-18 Veneto 45.434905 12.338452 378 44 422 9003 9425 685 800 24170 2256 22849.0 13002.0 35851 2145935 836440.0 NaN 4905854
2020-10-19 2020-10-19 Veneto 45.434905 12.338452 396 44 440 9405 9845 420 502 24253 2255 22902.0 13451.0 36353 2150361 838391.0 NaN 4905854
2020-10-20 2020-10-20 Veneto 45.434905 12.338452 459 51 510 9746 10256 411 490 24319 2268 22987.0 13856.0 36843 2158487 841548.0 NaN 4905854
2020-10-21 2020-10-21 Veneto 45.434905 12.338452 439 56 495 10938 11433 1177 1422 24550 2282 23338.0 14927.0 38265 2178114 849385.0 NaN 4905854

 

(double click and click on legend to select one or multiple regions in the graph)

In [61]:
df2 = df

fig = px.line(df2, x=df2.index, y="NewPositives", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="Daily new cases, absolute numbers")
fig.show()
In [62]:
df2['MovAv7'] = df2['NewPositives'].rolling(window=7).mean()

fig = px.line(df2[df2.index>'2020-3-1'], x=df2.index[df2.index>'2020-3-1'], y="MovAv7", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="1-week MA of daily new cases")
fig.show()
In [63]:
df2['NewPos_pc'] = df2['NewPositives']/df2['Pop']*1000_000

df2['NewPos_pc'] = df2['NewPos_pc'].rolling(window=7).mean()

fig = px.line(df2[df2.index>'2020-3-1'], x=df2.index[df2.index>'2020-3-1'], y="NewPos_pc", color="Region", 
              hover_name="Region", log_y=False)
fig.update_layout(title="1-week MA of daily new cases, per million")
fig.show()
In [64]:
df2['IC_pc'] = df2['IC']/df2['Pop']*1000_000

fig = px.line(df2, x="Date", y="IC_pc", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="Current intensive care patients, per million")
fig.show()
In [65]:
df2['Hosp_pc'] = df2['HospTotal']/df2['Pop']*1000000

fig = px.line(df2, x="Date", y="Hosp_pc", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="Current hospitalized, per million")
fig.show()
In [66]:
df3 = df2.copy()

df3['NewDeaths'] = df3['Deaths'] - df3.groupby(['Region'])['Deaths'].transform('shift')

fig = px.bar(df3, x=df3['Date'], y="NewDeaths", color="Region", hover_name="Date")
fig.update_layout(title="Daily number of new deaths, absolute numbers")
fig.show()
In [67]:
df2['NewNoOfTests'] = df2['NoOfTests'] - df2.groupby(['Region'])['NoOfTests'].transform('shift')
df2.head()

df2['New_per_test'] = df2['NewPositives']/df2['NewNoOfTests'].rolling(window=7).mean()*100

fig = px.line(df2[df2['Region'].isin(['Lombardia','Veneto','Emilia-Romagna','Piemonte','Liguria'])], 
              x=df2[df2['Region'].isin(['Lombardia','Veneto','Emilia-Romagna','Piemonte','Liguria'])].index, y="New_per_test", color="Region", hover_name="Region",
        render_mode="svg", log_y=True, line_shape='spline')
fig.update_layout(title="1-week MA % of positive tests in Northern regions")
fig.show()
In [68]:
df2['Deaths_per_mio'] = (df2['Deaths']/df2['Pop'])*1000_000
fig = px.line(df2, x="Date", y="Deaths_per_mio", color="Region", 
              hover_name="Region", render_mode="svg", line_shape='spline')
fig.update_layout(title="Cumulative number of deaths, per million")
fig.show()
In [69]:
df2['Change_per_mio'] = df2['VariationOfPositives']/df2['Pop']*1000_000
df2['Change_per_mio'] = df2['Change_per_mio'].rolling(window=7).mean()


# [df2['Region'].isin(['Lombardia','Veneto','Emilia-Romagna','Piemonte','Liguria'])]
fig = px.line(df2[(df2.index>'2020-3-1') & (df2['Region']!="""Valle d'Aosta""")], x='Date', y="Change_per_mio", color="Region", hover_name="Date")
fig.update_layout(title="1-week MA of change in positive cases, per million (excl. Valle d'Aosta)")
fig.show()

 

Italy as a whole

All regions aggregated

In [70]:
df2 = df
df_sum = df2.drop(['Lat','Long'], axis=1).groupby(df.Date).sum().reset_index()

df_sum2 = pd.melt(df_sum, id_vars=['Date'], value_vars=['NewPositives','IC','HospTotal'])

fig = px.line(df_sum2, x="Date", y="value", color='variable', hover_name="value", render_mode="svg", log_y=True, 
              line_shape='spline')
fig.update_layout(title="Number of daily new positive cases, current IC patients and total hospitalized")
fig.show()
In [ ]: